From 8511f9f15c3b878770e9ba7a7bc0a66b11c46a5e Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Fri, 25 Feb 2005 18:37:31 +0000 Subject: [PATCH] bitkeeper revision 1.1236.1.27 (421f706biBboh8DlmOttNIpUogeM6Q) A few fixes, and DOM0 by default is now allocated all of memory at boot time. Signed-off-by: Keir Fraser --- .rootkeys | 3 +- xen/arch/x86/boot/x86_32.S | 4 +- xen/arch/x86/boot/x86_64.S | 4 +- xen/arch/x86/{x86_64 => }/domain_build.c | 275 ++++++++++----- xen/arch/x86/setup.c | 20 +- xen/arch/x86/x86_32/domain_build.c | 416 ----------------------- xen/arch/x86/x86_32/domain_page.c | 10 +- xen/common/page_alloc.c | 4 +- xen/drivers/char/console.c | 2 + xen/include/asm-x86/shadow.h | 20 +- xen/include/xen/sched.h | 11 +- 11 files changed, 229 insertions(+), 540 deletions(-) rename xen/arch/x86/{x86_64 => }/domain_build.c (65%) delete mode 100644 xen/arch/x86/x86_32/domain_build.c diff --git a/.rootkeys b/.rootkeys index 996aac70b0..317f537b0f 100644 --- a/.rootkeys +++ b/.rootkeys @@ -946,6 +946,7 @@ 3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c 40e34414WiQO4h2m3tcpaCPn7SyYyg xen/arch/x86/dom0_ops.c 3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/domain.c +4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/domain_build.c 41d3eaae6GSDo3ZJDfK3nvQsJux-PQ xen/arch/x86/e820.c 3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c 3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c @@ -984,7 +985,6 @@ 41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c 419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c 4107c15e_NqNYew2EXroXz2mgTAMWQ xen/arch/x86/x86_32/call_with_regs.S -4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/x86_32/domain_build.c 3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c 3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S 3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c @@ -993,7 +993,6 @@ 3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c 3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds 41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c -4202391dA91ZovYX9d_5zJi9yGvLoQ xen/arch/x86/x86_64/domain_build.c 40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S 41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c 42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c diff --git a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S index 3ed99b6f4c..bc20d62fc3 100644 --- a/xen/arch/x86/boot/x86_32.S +++ b/xen/arch/x86/boot/x86_32.S @@ -15,9 +15,9 @@ ENTRY(start) /* Magic number indicating a Multiboot header. */ .long 0x1BADB002 /* Flags to bootloader (see Multiboot spec). */ - .long 0x00000002 + .long 0x00000003 /* Checksum: must be the negated sum of the first two fields. */ - .long -0x1BADB004 + .long -0x1BADB005 bad_cpu_msg: .asciz "ERR: Not a P6-compatible CPU!" diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S index ab729da994..b4f923f15a 100644 --- a/xen/arch/x86/boot/x86_64.S +++ b/xen/arch/x86/boot/x86_64.S @@ -16,9 +16,9 @@ ENTRY(start) /* Magic number indicating a Multiboot header. */ .long 0x1BADB002 /* Flags to bootloader (see Multiboot spec). */ - .long 0x00000002 + .long 0x00000003 /* Checksum: must be the negated sum of the first two fields. */ - .long -0x1BADB004 + .long -0x1BADB005 .org 0x010 .asciz "ERR: Not a 64-bit CPU!" diff --git a/xen/arch/x86/x86_64/domain_build.c b/xen/arch/x86/domain_build.c similarity index 65% rename from xen/arch/x86/x86_64/domain_build.c rename to xen/arch/x86/domain_build.c index be7fc6c8c8..802fb6d8c0 100644 --- a/xen/arch/x86/x86_64/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -10,19 +10,28 @@ #include #include #include +#include +#include +#include #include #include #include #include -#include #include #include -#include -#include -#include +#include + +/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */ +static unsigned int opt_dom0_mem = 0; +integer_param("dom0_mem", opt_dom0_mem); +#if defined(__i386__) +/* No ring-3 access in initial leaf page tables. */ +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#elif defined(__x86_64__) /* Allow ring-3 access in long mode as guest cannot use ring 1. */ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#endif #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) @@ -30,9 +39,19 @@ #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) #define round_pgdown(_p) ((_p)&PAGE_MASK) +static struct pfn_info *alloc_largest(struct domain *d, unsigned long max) +{ + struct pfn_info *page; + unsigned int order = get_order(max * PAGE_SIZE); + if ( (max & (max-1)) != 0 ) + order--; + while ( (page = alloc_domheap_pages(d, order)) == NULL ) + if ( order-- == 0 ) + break; + return page; +} + int construct_dom0(struct domain *d, - unsigned long alloc_start, - unsigned long alloc_end, unsigned long _image_start, unsigned long image_len, unsigned long _initrd_start, unsigned long initrd_len, char *cmdline) @@ -40,18 +59,25 @@ int construct_dom0(struct domain *d, char *dst; int i, rc; unsigned long pfn, mfn; - unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT; + unsigned long nr_pages; unsigned long nr_pt_pages; + unsigned long alloc_start; + unsigned long alloc_end; unsigned long count; - l4_pgentry_t *l4tab = NULL, *l4start = NULL; - l3_pgentry_t *l3tab = NULL, *l3start = NULL; - l2_pgentry_t *l2tab = NULL, *l2start = NULL; - l1_pgentry_t *l1tab = NULL, *l1start = NULL; struct pfn_info *page = NULL; start_info_t *si; struct exec_domain *ed = d->exec_domain[0]; +#if defined(__i386__) + char *image_start = (char *)_image_start; /* use lowmem mappings */ + char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ +#elif defined(__x86_64__) char *image_start = __va(_image_start); char *initrd_start = __va(_initrd_start); + l4_pgentry_t *l4tab = NULL, *l4start = NULL; + l3_pgentry_t *l3tab = NULL, *l3start = NULL; +#endif + l2_pgentry_t *l2tab = NULL, *l2start = NULL; + l1_pgentry_t *l1tab = NULL, *l1start = NULL; /* * This fully describes the memory layout of the initial domain. All @@ -86,18 +112,17 @@ int construct_dom0(struct domain *d, printk("*** LOADING DOMAIN 0 ***\n"); - /* - * This is all a bit grim. We've moved the modules to the "safe" physical - * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this - * routine we're going to copy it down into the region that's actually - * been allocated to domain 0. This is highly likely to be overlapping, so - * we use a forward copy. - * - * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with - * 4GB and lots of network/disk cards that allocate loads of buffers. - * We'll have to revisit this if we ever support PAE (64GB). - */ - + /* By default DOM0 is allocated all available memory. */ + if ( (nr_pages = opt_dom0_mem >> (PAGE_SHIFT - 10)) == 0 ) + nr_pages = avail_domheap_pages() + + ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) + + ((image_len + PAGE_SIZE - 1) >> PAGE_SHIFT); + d->max_pages = nr_pages; + if ( (page = alloc_largest(d, nr_pages)) == NULL ) + panic("Not enough RAM for DOM0 reservation.\n"); + alloc_start = page_to_phys(page); + alloc_end = alloc_start + (d->tot_pages << PAGE_SHIFT); + rc = parseelfimage(image_start, image_len, &dsi); if ( rc != 0 ) return rc; @@ -131,6 +156,11 @@ int construct_dom0(struct domain *d, v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); if ( (v_end - vstack_end) < (512UL << 10) ) v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ +#if defined(__i386__) + if ( (((v_end - dsi.v_start + ((1UL<> + L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) + break; +#elif defined(__x86_64__) #define NR(_l,_h,_s) \ (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ ((_l) & ~((1UL<<(_s))-1))) >> (_s)) @@ -140,15 +170,12 @@ int construct_dom0(struct domain *d, NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */ <= nr_pt_pages ) break; +#endif } - printk("PHYSICAL MEMORY ARRANGEMENT:\n" - " Kernel image: %p->%p\n" - " Initrd image: %p->%p\n" - " Dom0 alloc.: %p->%p\n", - _image_start, _image_start + image_len, - _initrd_start, _initrd_start + initrd_len, - alloc_start, alloc_end); + if ( (v_end - dsi.v_start) > (alloc_end - alloc_start) ) + panic("Insufficient contiguous RAM to build kernel image.\n"); + printk("VIRTUAL MEMORY ARRANGEMENT:\n" " Loaded kernel: %p->%p\n" " Init. ramdisk: %p->%p\n" @@ -174,48 +201,6 @@ int construct_dom0(struct domain *d, return -ENOMEM; } - /* Overlap with Xen protected area? */ - if ( (dsi.v_start < HYPERVISOR_VIRT_END) && - (v_end > HYPERVISOR_VIRT_START) ) - { - printk("DOM0 image overlaps with Xen private area.\n"); - return -EINVAL; - } - - /* Paranoia: scrub DOM0's memory allocation. */ - printk("Scrubbing DOM0 RAM: "); - dst = __va(alloc_start); - while ( __pa(dst) < alloc_end ) - { -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */ - printk("."); - touch_nmi_watchdog(); - if ( (alloc_end - __pa(dst)) > SCRUB_BYTES ) - { - memset(dst, 0, SCRUB_BYTES); - dst += SCRUB_BYTES; - } - else - { - memset(dst, 0, alloc_end - __pa(dst)); - break; - } - } - printk("done.\n"); - - /* Construct a frame-allocation list for the initial domain. */ - for ( mfn = (alloc_start>>PAGE_SHIFT); - mfn < (alloc_end>>PAGE_SHIFT); - mfn++ ) - { - page = &frame_table[mfn]; - page_set_owner(page, d); - page->u.inuse.type_info = 0; - page->count_info = PGC_allocated | 1; - list_add_tail(&page->list, &d->page_list); - d->tot_pages++; d->max_pages++; - } - mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); @@ -231,6 +216,103 @@ int construct_dom0(struct domain *d, for ( i = 0; i < 256; i++ ) ed->arch.traps[i].cs = FLAT_KERNEL_CS; +#if defined(__i386__) + + /* + * Protect the lowest 1GB of memory. We use a temporary mapping there + * from which we copy the kernel and ramdisk images. + */ + if ( dsi.v_start < (1UL<<30) ) + { + printk("Initial loading isn't allowed to lowest 1GB of memory.\n"); + return -EINVAL; + } + + /* WARNING: The new domain must have its 'processor' field filled in! */ + l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; + memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); + l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = + mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); + l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = + mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR); + ed->arch.guest_table = mk_pagetable((unsigned long)l2start); + + l2tab += l2_table_offset(dsi.v_start); + mfn = alloc_start >> PAGE_SHIFT; + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) + { + if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) + { + l1start = l1tab = (l1_pgentry_t *)mpt_alloc; + mpt_alloc += PAGE_SIZE; + *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT); + clear_page(l1tab); + if ( count == 0 ) + l1tab += l1_table_offset(dsi.v_start); + } + *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); + + page = &frame_table[mfn]; + if ( !get_page_and_type(page, d, PGT_writable_page) ) + BUG(); + + mfn++; + } + + /* Pages that are part of page tables must be read only. */ + l2tab = l2start + l2_table_offset(vpt_start); + l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); + l1tab += l1_table_offset(vpt_start); + for ( count = 0; count < nr_pt_pages; count++ ) + { + *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); + page = &frame_table[l1_pgentry_to_pfn(*l1tab)]; + if ( count == 0 ) + { + page->u.inuse.type_info &= ~PGT_type_mask; + page->u.inuse.type_info |= PGT_l2_page_table; + + /* + * No longer writable: decrement the type_count. + * Installed as CR3: increment both the ref_count and type_count. + * Net: just increment the ref_count. + */ + get_page(page, d); /* an extra ref because of readable mapping */ + + /* Get another ref to L2 page so that it can be pinned. */ + if ( !get_page_and_type(page, d, PGT_l2_page_table) ) + BUG(); + set_bit(_PGT_pinned, &page->u.inuse.type_info); + } + else + { + page->u.inuse.type_info &= ~PGT_type_mask; + page->u.inuse.type_info |= PGT_l1_page_table; + page->u.inuse.type_info |= + ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))< HYPERVISOR_VIRT_START) ) + { + printk("DOM0 image overlaps with Xen private area.\n"); + return -EINVAL; + } + /* WARNING: The new domain must have its 'processor' field filled in! */ phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; @@ -320,6 +402,8 @@ int construct_dom0(struct domain *d, } } +#endif /* __x86_64__ */ + /* Set up shared-info area. */ update_dom_time(d); d->shared_info->domain_time = 0; @@ -335,17 +419,23 @@ int construct_dom0(struct domain *d, __cli(); write_ptbase(ed); - /* Copy the OS image. */ + /* Copy the OS image and free temporary buffer. */ (void)loadelfimage(image_start); + init_domheap_pages( + _image_start, (_image_start+image_len+PAGE_SIZE-1) & PAGE_MASK); - /* Copy the initial ramdisk. */ + /* Copy the initial ramdisk and free temporary buffer. */ if ( initrd_len != 0 ) + { memcpy((void *)vinitrd_start, initrd_start, initrd_len); + init_domheap_pages( + _initrd_start, (_initrd_start+initrd_len+PAGE_SIZE-1) & PAGE_MASK); + } /* Set up start info area. */ si = (start_info_t *)vstartinfo_start; memset(si, 0, PAGE_SIZE); - si->nr_pages = d->tot_pages; + si->nr_pages = nr_pages; si->shared_info = virt_to_phys(d->shared_info); si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; si->pt_base = vpt_start; @@ -364,6 +454,22 @@ int construct_dom0(struct domain *d, ((u32 *)vphysmap_start)[pfn] = mfn; machine_to_phys_mapping[mfn] = pfn; } + while ( pfn < nr_pages ) + { + if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL ) + panic("Not enough RAM for DOM0 reservation.\n"); + while ( pfn < d->tot_pages ) + { + mfn = page_to_pfn(page); +#ifndef NDEBUG +#define pfn (nr_pages - 1 - (pfn - ((alloc_end - alloc_start) >> PAGE_SHIFT))) +#endif + ((u32 *)vphysmap_start)[pfn] = mfn; + machine_to_phys_mapping[mfn] = pfn; +#undef pfn + page++; pfn++; + } + } if ( initrd_len != 0 ) { @@ -389,6 +495,14 @@ int construct_dom0(struct domain *d, write_ptbase(current); __sti(); +#if defined(__i386__) + /* Destroy low mappings - they were only for our convenience. */ + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) + if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE ) + l2start[i] = mk_l2_pgentry(0); + zap_low_mappings(); /* Do the same for the idle page tables. */ +#endif + /* DOM0 gets access to everything. */ physdev_init_dom0(d); @@ -402,12 +516,17 @@ int construct_dom0(struct domain *d, int elf_sanity_check(Elf_Ehdr *ehdr) { if ( !IS_ELF(*ehdr) || +#if defined(__i386__) + (ehdr->e_ident[EI_CLASS] != ELFCLASS32) || + (ehdr->e_machine != EM_386) || +#elif defined(__x86_64__) (ehdr->e_ident[EI_CLASS] != ELFCLASS64) || + (ehdr->e_machine != EM_X86_64) || +#endif (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || - (ehdr->e_type != ET_EXEC) || - (ehdr->e_machine != EM_X86_64) ) + (ehdr->e_type != ET_EXEC) ) { - printk("DOM0 image is not x86/64-compatible executable Elf image.\n"); + printk("DOM0 image is not a Xen-compatible Elf image.\n"); return 0; } diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index b2e36a64dd..1b6ffd6717 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -20,10 +20,6 @@ #include #include -/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */ -static unsigned int opt_dom0_mem = 16000; -integer_param("dom0_mem", opt_dom0_mem); - /* * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the * pfn_info table and allocation bitmap. @@ -463,7 +459,6 @@ void __init __start_xen(multiboot_info_t *mbi) module_t *mod = (module_t *)__va(mbi->mods_addr); void *heap_start; unsigned long firsthole_start, nr_pages; - unsigned long dom0_memory_start, dom0_memory_end; unsigned long initial_images_start, initial_images_end; struct e820entry e820_raw[E820MAX]; int i, e820_raw_nr = 0, bytes = 0; @@ -567,15 +562,6 @@ void __init __start_xen(multiboot_info_t *mbi) nr_pages >> (20 - PAGE_SHIFT), nr_pages << (PAGE_SHIFT - 10)); - /* Allocate an aligned chunk of RAM for DOM0. */ - dom0_memory_start = alloc_boot_pages(opt_dom0_mem << 10, 4UL << 20); - dom0_memory_end = dom0_memory_start + (opt_dom0_mem << 10); - if ( dom0_memory_start == 0 ) - { - printk("Not enough memory for DOM0 memory reservation.\n"); - for ( ; ; ) ; - } - init_frametable(); end_boot_allocator(); @@ -613,7 +599,7 @@ void __init __start_xen(multiboot_info_t *mbi) * We're going to setup domain0 using the module(s) that we stashed safely * above our heap. The second module, if present, is an initrd ramdisk. */ - if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end, + if ( construct_dom0(dom0, initial_images_start, mod[0].mod_end-mod[0].mod_start, (mbi->mods_count == 1) ? 0 : @@ -624,9 +610,7 @@ void __init __start_xen(multiboot_info_t *mbi) cmdline) != 0) panic("Could not set up DOM0 guest OS\n"); - /* The stash space for the initial kernel image can now be freed up. */ - init_domheap_pages(initial_images_start, initial_images_end); - + /* Scrub RAM that is still free and so may go to an unprivileged domain. */ scrub_heap_pages(); init_trace_bufs(); diff --git a/xen/arch/x86/x86_32/domain_build.c b/xen/arch/x86/x86_32/domain_build.c deleted file mode 100644 index d7cdc892c1..0000000000 --- a/xen/arch/x86/x86_32/domain_build.c +++ /dev/null @@ -1,416 +0,0 @@ -/****************************************************************************** - * domain_build.c - * - * Copyright (c) 2002-2005, K A Fraser - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* No ring-3 access in initial page tables. */ -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) - -#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) -#define round_pgdown(_p) ((_p)&PAGE_MASK) - -int construct_dom0(struct domain *d, - unsigned long alloc_start, - unsigned long alloc_end, - unsigned long _image_start, unsigned long image_len, - unsigned long _initrd_start, unsigned long initrd_len, - char *cmdline) -{ - char *dst; - int i, rc; - unsigned long pfn, mfn; - unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT; - unsigned long nr_pt_pages; - unsigned long count; - l2_pgentry_t *l2tab, *l2start; - l1_pgentry_t *l1tab = NULL, *l1start = NULL; - struct pfn_info *page = NULL; - start_info_t *si; - struct exec_domain *ed = d->exec_domain[0]; - char *image_start = (char *)_image_start; /* use lowmem mappings */ - char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ - - /* - * This fully describes the memory layout of the initial domain. All - * *_start address are page-aligned, except v_start (and v_end) which are - * superpage-aligned. - */ - struct domain_setup_info dsi; - unsigned long vinitrd_start; - unsigned long vinitrd_end; - unsigned long vphysmap_start; - unsigned long vphysmap_end; - unsigned long vstartinfo_start; - unsigned long vstartinfo_end; - unsigned long vstack_start; - unsigned long vstack_end; - unsigned long vpt_start; - unsigned long vpt_end; - unsigned long v_end; - - /* Machine address of next candidate page-table page. */ - unsigned long mpt_alloc; - - extern void physdev_init_dom0(struct domain *); - - /* Sanity! */ - if ( d->id != 0 ) - BUG(); - if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) - BUG(); - - memset(&dsi, 0, sizeof(struct domain_setup_info)); - - printk("*** LOADING DOMAIN 0 ***\n"); - - /* - * This is all a bit grim. We've moved the modules to the "safe" physical - * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this - * routine we're going to copy it down into the region that's actually - * been allocated to domain 0. This is highly likely to be overlapping, so - * we use a forward copy. - * - * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with - * 4GB and lots of network/disk cards that allocate loads of buffers. - * We'll have to revisit this if we ever support PAE (64GB). - */ - - rc = parseelfimage(image_start, image_len, &dsi); - if ( rc != 0 ) - return rc; - - /* Set up domain options */ - if ( dsi.use_writable_pagetables ) - vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); - - /* Align load address to 4MB boundary. */ - dsi.v_start &= ~((1UL<<22)-1); - - /* - * Why do we need this? The number of page-table frames depends on the - * size of the bootstrap address space. But the size of the address space - * depends on the number of page-table frames (since each one is mapped - * read-only). We have a pair of simultaneous equations in two unknowns, - * which we solve by exhaustive search. - */ - vinitrd_start = round_pgup(dsi.v_kernend); - vinitrd_end = vinitrd_start + initrd_len; - vphysmap_start = round_pgup(vinitrd_end); - vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32)); - vpt_start = round_pgup(vphysmap_end); - for ( nr_pt_pages = 2; ; nr_pt_pages++ ) - { - vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); - vstartinfo_start = vpt_end; - vstartinfo_end = vstartinfo_start + PAGE_SIZE; - vstack_start = vstartinfo_end; - vstack_end = vstack_start + PAGE_SIZE; - v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); - if ( (v_end - vstack_end) < (512UL << 10) ) - v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ - if ( (((v_end - dsi.v_start + ((1UL<> - L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) - break; - } - - printk("PHYSICAL MEMORY ARRANGEMENT:\n" - " Kernel image: %p->%p\n" - " Initrd image: %p->%p\n" - " Dom0 alloc.: %p->%p\n", - _image_start, _image_start + image_len, - _initrd_start, _initrd_start + initrd_len, - alloc_start, alloc_end); - printk("VIRTUAL MEMORY ARRANGEMENT:\n" - " Loaded kernel: %p->%p\n" - " Init. ramdisk: %p->%p\n" - " Phys-Mach map: %p->%p\n" - " Page tables: %p->%p\n" - " Start info: %p->%p\n" - " Boot stack: %p->%p\n" - " TOTAL: %p->%p\n", - dsi.v_kernstart, dsi.v_kernend, - vinitrd_start, vinitrd_end, - vphysmap_start, vphysmap_end, - vpt_start, vpt_end, - vstartinfo_start, vstartinfo_end, - vstack_start, vstack_end, - dsi.v_start, v_end); - printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry); - - if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) - { - printk("Initial guest OS requires too much space\n" - "(%luMB is greater than %luMB limit)\n", - (v_end-dsi.v_start)>>20, (nr_pages<>20); - return -ENOMEM; - } - - /* - * Protect the lowest 1GB of memory. We use a temporary mapping there - * from which we copy the kernel and ramdisk images. - */ - if ( dsi.v_start < (1UL<<30) ) - { - printk("Initial loading isn't allowed to lowest 1GB of memory.\n"); - return -EINVAL; - } - - /* Paranoia: scrub DOM0's memory allocation. */ - printk("Scrubbing DOM0 RAM: "); - dst = (char *)alloc_start; - while ( dst < (char *)alloc_end ) - { -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */ - printk("."); - touch_nmi_watchdog(); - if ( ((char *)alloc_end - dst) > SCRUB_BYTES ) - { - memset(dst, 0, SCRUB_BYTES); - dst += SCRUB_BYTES; - } - else - { - memset(dst, 0, (char *)alloc_end - dst); - break; - } - } - printk("done.\n"); - - /* Construct a frame-allocation list for the initial domain. */ - for ( mfn = (alloc_start>>PAGE_SHIFT); - mfn < (alloc_end>>PAGE_SHIFT); - mfn++ ) - { - page = &frame_table[mfn]; - page_set_owner(page, d); - page->u.inuse.type_info = 0; - page->count_info = PGC_allocated | 1; - list_add_tail(&page->list, &d->page_list); - d->tot_pages++; d->max_pages++; - } - - mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; - - SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); - SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS); - - /* - * We're basically forcing default RPLs to 1, so that our "what privilege - * level are we returning to?" logic works. - */ - ed->arch.failsafe_selector = FLAT_KERNEL_CS; - ed->arch.event_selector = FLAT_KERNEL_CS; - ed->arch.kernel_ss = FLAT_KERNEL_SS; - for ( i = 0; i < 256; i++ ) - ed->arch.traps[i].cs = FLAT_KERNEL_CS; - - /* WARNING: The new domain must have its 'processor' field filled in! */ - l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; - memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); - l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); - l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR); - ed->arch.guest_table = mk_pagetable((unsigned long)l2start); - - l2tab += l2_table_offset(dsi.v_start); - mfn = alloc_start >> PAGE_SHIFT; - for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) - { - if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) - { - l1start = l1tab = (l1_pgentry_t *)mpt_alloc; - mpt_alloc += PAGE_SIZE; - *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT); - clear_page(l1tab); - if ( count == 0 ) - l1tab += l1_table_offset(dsi.v_start); - } - *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); - - page = &frame_table[mfn]; - if ( !get_page_and_type(page, d, PGT_writable_page) ) - BUG(); - - mfn++; - } - - /* Pages that are part of page tables must be read only. */ - l2tab = l2start + l2_table_offset(vpt_start); - l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); - l1tab += l1_table_offset(vpt_start); - for ( count = 0; count < nr_pt_pages; count++ ) - { - *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); - page = &frame_table[l1_pgentry_to_pfn(*l1tab)]; - if ( count == 0 ) - { - page->u.inuse.type_info &= ~PGT_type_mask; - page->u.inuse.type_info |= PGT_l2_page_table; - - /* - * No longer writable: decrement the type_count. - * Installed as CR3: increment both the ref_count and type_count. - * Net: just increment the ref_count. - */ - get_page(page, d); /* an extra ref because of readable mapping */ - - /* Get another ref to L2 page so that it can be pinned. */ - if ( !get_page_and_type(page, d, PGT_l2_page_table) ) - BUG(); - set_bit(_PGT_pinned, &page->u.inuse.type_info); - } - else - { - page->u.inuse.type_info &= ~PGT_type_mask; - page->u.inuse.type_info |= PGT_l1_page_table; - page->u.inuse.type_info |= - ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<shared_info->domain_time = 0; - /* Mask all upcalls... */ - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; - d->shared_info->n_vcpu = smp_num_cpus; - - /* setup shadow and monitor tables */ - update_pagetables(ed); - - /* Install the new page tables. */ - __cli(); - write_ptbase(ed); - - /* Copy the OS image. */ - (void)loadelfimage(image_start); - - /* Copy the initial ramdisk. */ - if ( initrd_len != 0 ) - memcpy((void *)vinitrd_start, initrd_start, initrd_len); - - /* Set up start info area. */ - si = (start_info_t *)vstartinfo_start; - memset(si, 0, PAGE_SIZE); - si->nr_pages = d->tot_pages; - si->shared_info = virt_to_phys(d->shared_info); - si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; - si->pt_base = vpt_start; - si->nr_pt_frames = nr_pt_pages; - si->mfn_list = vphysmap_start; - - /* Write the phys->machine and machine->phys table entries. */ - for ( pfn = 0; pfn < d->tot_pages; pfn++ ) - { - mfn = pfn + (alloc_start>>PAGE_SHIFT); -#ifndef NDEBUG -#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT) - if ( pfn > REVERSE_START ) - mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START); -#endif - ((u32 *)vphysmap_start)[pfn] = mfn; - machine_to_phys_mapping[mfn] = pfn; - } - - if ( initrd_len != 0 ) - { - si->mod_start = vinitrd_start; - si->mod_len = initrd_len; - printk("Initrd len 0x%lx, start at 0x%p\n", - si->mod_len, si->mod_start); - } - - dst = si->cmd_line; - if ( cmdline != NULL ) - { - for ( i = 0; i < 255; i++ ) - { - if ( cmdline[i] == '\0' ) - break; - *dst++ = cmdline[i]; - } - } - *dst = '\0'; - - /* Reinstate the caller's page tables. */ - write_ptbase(current); - __sti(); - - /* Destroy low mappings - they were only for our convenience. */ - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE ) - l2start[i] = mk_l2_pgentry(0); - zap_low_mappings(); /* Do the same for the idle page tables. */ - - /* DOM0 gets access to everything. */ - physdev_init_dom0(d); - - set_bit(DF_CONSTRUCTED, &d->d_flags); - - new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); - -#ifndef NDEBUG - if (0) /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ - { - shadow_mode_enable(d, SHM_enable); - update_pagetables(ed); /* XXX SMP */ - } -#endif - - return 0; -} - -int elf_sanity_check(Elf_Ehdr *ehdr) -{ - if ( !IS_ELF(*ehdr) || - (ehdr->e_ident[EI_CLASS] != ELFCLASS32) || - (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || - (ehdr->e_type != ET_EXEC) || - (ehdr->e_machine != EM_386) ) - { - printk("DOM0 image is not i386-compatible executable Elf image.\n"); - return 0; - } - - return 1; -} - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - */ diff --git a/xen/arch/x86/x86_32/domain_page.c b/xen/arch/x86/x86_32/domain_page.c index e3aa720c78..d3ec56cd17 100644 --- a/xen/arch/x86/x86_32/domain_page.c +++ b/xen/arch/x86/x86_32/domain_page.c @@ -45,7 +45,7 @@ void *map_domain_mem(unsigned long pa) unsigned int idx, cpu = smp_processor_id(); unsigned long *cache = mapcache; #ifndef NDEBUG - unsigned flush_count = 0; + unsigned int flush_count = 0; #endif ASSERT(!in_irq()); @@ -65,17 +65,11 @@ void *map_domain_mem(unsigned long pa) idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1); if ( unlikely(idx == 0) ) { + ASSERT(flush_count++ == 0); flush_all_ready_maps(); perfc_incrc(domain_page_tlb_flush); local_flush_tlb(); shadow_epoch[cpu] = ++epoch; -#ifndef NDEBUG - if ( unlikely(flush_count++) ) - { - // we've run out of map cache entries... - BUG(); - } -#endif } } while ( cache[idx] != 0 ); diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 7a727f1251..61b4b2b4a8 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -203,8 +203,8 @@ unsigned long alloc_boot_pages(unsigned long size, unsigned long align) #define MEMZONE_DOM 1 #define NR_ZONES 2 -/* Up to 2^10 pages can be allocated at once. */ -#define MAX_ORDER 10 +/* Up to 2^20 pages can be allocated at once. */ +#define MAX_ORDER 20 static struct list_head heap[NR_ZONES][MAX_ORDER+1]; static unsigned long avail[NR_ZONES]; diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c index 03314d6793..dfdb1d7a2e 100644 --- a/xen/drivers/char/console.c +++ b/xen/drivers/char/console.c @@ -577,6 +577,8 @@ static int __init debugtrace_init(void) debugtrace_buf = (unsigned char *)alloc_xenheap_pages(order); ASSERT(debugtrace_buf != NULL); + memset(debugtrace_buf, '\0', debugtrace_bytes); + return 0; } __initcall(debugtrace_init); diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index 670394ce60..2e364eacc8 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -13,17 +13,20 @@ #define PSH_hl2 (1<<30) /* page is an hl2 */ #define PSH_pfn_mask ((1<<21)-1) -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */ - +/* Shadow PT operation mode: shadow-mode variable in arch_domain. */ #define SHM_enable (1<<0) /* we're in one of the shadow modes */ #define SHM_log_dirty (1<<1) /* enable log dirty mode */ -#define SHM_translate (1<<2) /* do p2m tranaltion on guest tables */ +#define SHM_translate (1<<2) /* do p2m translation on guest tables */ #define SHM_external (1<<3) /* external page table, not used by Xen */ #define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode) #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty) #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate) +#ifndef __x86_64__ /* XXX Currently breaks the 64-bit build. */ #define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external) +#else +#define shadow_mode_external(_d) (0) +#endif #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START) #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \ @@ -804,6 +807,10 @@ static inline void update_pagetables(struct exec_domain *ed) if ( !shadow_mode_external(d) ) { + /* + * Internal page tables: + * No need to allocate a separate page table for Xen. + */ #ifdef __x86_64__ if ( !(ed->arch.flags & TF_kernel_mode) ) ed->arch.monitor_table = ed->arch.guest_table_user; @@ -816,9 +823,10 @@ static inline void update_pagetables(struct exec_domain *ed) } else { - // External page tables... - // Allocate a monitor page table if we don't already have one. - // + /* + * External page tables: + * Allocate a monitor page table if we don't already have one. + */ if ( unlikely(!pagetable_val(ed->arch.monitor_table)) ) ed->arch.monitor_table = mk_pagetable(alloc_monitor_pagetable(ed) << PAGE_SHIFT); diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 0d4b60ac4b..5e5bb690f0 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -215,12 +215,11 @@ static inline void get_knownalive_domain(struct domain *d) extern struct domain *do_createdomain( domid_t dom_id, unsigned int cpu); -extern int construct_dom0(struct domain *d, - unsigned long alloc_start, - unsigned long alloc_end, - unsigned long image_start, unsigned long image_len, - unsigned long initrd_start, unsigned long initrd_len, - char *cmdline); +extern int construct_dom0( + struct domain *d, + unsigned long image_start, unsigned long image_len, + unsigned long initrd_start, unsigned long initrd_len, + char *cmdline); extern int final_setup_guest(struct domain *d, dom0_builddomain_t *); struct domain *find_domain_by_id(domid_t dom); -- 2.30.2